Metadata

Close
Metadata
@InProceedings{CaetanoBrémSchw:2019:SkImRe,
               author = "Caetano, Carlos and Br{\'e}mond, Fran{\c{c}}ois and Schwartz, 
                         William Robson",
          affiliation = "{Universidade Federal de Minas Gerais} and INRIA and {Universidade 
                         Federal de Minas Gerais}",
                title = "Skeleton Image Representation for 3D Action Recognition based on 
                         Tree Structure and Reference Joints",
            booktitle = "Proceedings...",
                 year = "2019",
               editor = "Oliveira, Luciano Rebou{\c{c}}as de and Sarder, Pinaki and Lage, 
                         Marcos and Sadlo, Filip",
         organization = "Conference on Graphics, Patterns and Images, 32. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "skeleton image representation, convolutional neural network (CNN), 
                         3D action recognition.",
             abstract = "In the last years, the computer vision research community has 
                         studied on how to model temporal dynamics in videos to employ 3D 
                         human action recognition. To that end, two main baseline 
                         approaches have been researched: (i) Recurrent Neural Networks 
                         (RNNs) with Long-Short Term Memory (LSTM); and (ii) skeleton image 
                         representations used as input to a Convolutional Neural Network 
                         (CNN). Although RNN approaches present excellent results, such 
                         methods lack the ability to efficiently learn the spatial 
                         relations between the skeleton joints. On the other hand, the 
                         representations used to feed CNN approaches present the advantage 
                         of having the natural ability of learning structural information 
                         from 2D arrays (i.e., they learn spatial relations from the 
                         skeleton joints). To further improve such representations, we 
                         introduce the \metodosigla, a novel skeleton image representation 
                         to be used as input to CNNs. The proposed representation has the 
                         advantage of combining the use of reference joints and a tree 
                         structure skeleton. While the former incorporates different 
                         spatial relationships between the joints, the latter preserves 
                         important spatial relations by traversing a skeleton tree with a 
                         depth-first order algorithm. Experimental results demonstrate the 
                         effectiveness of the proposed representation for 3D action 
                         recognition on two datasets achieving state-of-the-art results on 
                         the recent NTU RGB+D~120 dataset.",
  conference-location = "Rio de Janeiro, RJ, Brazil",
      conference-year = "28-31 Oct. 2019",
                  doi = "10.1109/SIBGRAPI.2019.00011",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI.2019.00011",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/3U2JR42",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/3U2JR42",
           targetfile = "SIBGRAPI2019_submitted_camera_ready.pdf",
        urlaccessdate = "2024, Apr. 27"
}